Specify the tissue of interest, run the boilerplate code which sets up the functions and environment, load the tissue object.

tissue_of_interest = "Lung"
library(here)
## here() starts at /home/rstudio/tabula-muris
source(here("00_data_ingest", "02_tissue_analysis_rmd", "boilerplate.R"))
## Loading required package: ggplot2
## Loading required package: cowplot
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
## Loading required package: Matrix
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## ── Attaching packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  1.3.4     ✔ purrr   0.2.4
## ✔ tidyr   0.7.2     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0
## ── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ tidyr::expand()   masks Matrix::expand()
## ✖ dplyr::filter()   masks stats::filter()
## ✖ cowplot::ggsave() masks ggplot2::ggsave()
## ✖ dplyr::lag()      masks stats::lag()
tiss <- load_tissue_facs(tissue_of_interest)
## [1] "Scaling data matrix"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

Visualize top genes in principal components

PCElbowPlot(object = tiss)

# Set number of principal components. 
n.pcs = 20
# Set resolution 
res.used <- 3

tiss <- FindClusters(object = tiss, reduction.type = "pca", dims.use = 1:n.pcs, 
    resolution = res.used, print.output = 0, save.SNN = TRUE)
tiss <- RunTSNE(object = tiss, dims.use = 1:n.pcs, seed.use = 10, perplexity=30)
# note that you can set do.label=T to help label individual clusters
TSNEPlot(object = tiss, do.label = T)

# Batch and animal effects
TSNEPlot(object = tiss, do.return = TRUE, group.by = "plate.barcode")

TSNEPlot(object = tiss, do.return = TRUE, group.by = "mouse.id")

Check expression of genes of interset.

Dotplots let you see the intensity of expression and the fraction of cells expressing for each of your genes of interest.

How big are the clusters?

table(tiss@ident)
## 
##   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17 
## 148 135 125 121 120 113 101  90  85  78  68  65  62  57  53  47  42  40 
##  18  19  20  21  22 
##  39  37  35  30  25

Which markers identify a specific cluster?

#clust.markers <- FindMarkers(object = tiss, ident.1 = 3, only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25)
#print(x = head(x= clust.markers, n = 10))

Assigning cell type identity to clusters

At a coarse level, we can use canonical markers to match the unbiased clustering to known cell types:

# stash current cluster IDs
tiss <- StashIdent(object = tiss, save.name = "cluster.ids")

# enumerate current cluster IDs and the labels for them
cluster.ids <- 0:22
free_annotation <- c(NA,
                     NA,
                     NA,
                     NA,
                     NA,
                     "alveolar epithelial type 1 cells, alveolar epithelial type 2 cells, club cells, and basal cells",
                     NA,
                     "invading monocytes",
                     "dendritic cells, alveolar macrophages, and interstital macrophages",
                     NA,
                     NA,
                     "circulating monocytes",
                     NA,
                     NA,
                     NA,
                     NA,
                     NA,
                     "lung neuroendocrine cells and unknown cells",
                     NA,
                     NA,
                     "mast cells and unknown immune cells",
                     NA,
                     "multiciliated cells")


cell_ontology_class <-c("lung endothelial cell",
                     "lung endothelial cell",
                     "stromal cell",
                     "lung endothelial cell",
                     "lung endothelial cell",
                     "epithelial cell of lung",
                     "stromal cell",
                     "classical monocyte",
                     "myeloid cell",
                     "stromal cell",
                     "lung endothelial cell",
                     "monocyte",
                     "lung endothelial cell",
                     "B cell",
                     "T cell",
                     "stromal cell",
                     "stromal cell",
                     NA,
                     "lung endothelial cell",
                     "natural killer cell",
                     "leukocyte",
                     "stromal cell",
                     "ciliated columnar cell of tracheobronchial tree")

tiss = stash_annotations(tiss, cluster.ids, free_annotation, cell_ontology_class)
data.frame(cell_ontology_class, free_annotation)
##                                cell_ontology_class
## 1                            lung endothelial cell
## 2                            lung endothelial cell
## 3                                     stromal cell
## 4                            lung endothelial cell
## 5                            lung endothelial cell
## 6                          epithelial cell of lung
## 7                                     stromal cell
## 8                               classical monocyte
## 9                                     myeloid cell
## 10                                    stromal cell
## 11                           lung endothelial cell
## 12                                        monocyte
## 13                           lung endothelial cell
## 14                                          B cell
## 15                                          T cell
## 16                                    stromal cell
## 17                                    stromal cell
## 18                                            <NA>
## 19                           lung endothelial cell
## 20                             natural killer cell
## 21                                       leukocyte
## 22                                    stromal cell
## 23 ciliated columnar cell of tracheobronchial tree
##                                                                                    free_annotation
## 1                                                                                             <NA>
## 2                                                                                             <NA>
## 3                                                                                             <NA>
## 4                                                                                             <NA>
## 5                                                                                             <NA>
## 6  alveolar epithelial type 1 cells, alveolar epithelial type 2 cells, club cells, and basal cells
## 7                                                                                             <NA>
## 8                                                                               invading monocytes
## 9                               dendritic cells, alveolar macrophages, and interstital macrophages
## 10                                                                                            <NA>
## 11                                                                                            <NA>
## 12                                                                           circulating monocytes
## 13                                                                                            <NA>
## 14                                                                                            <NA>
## 15                                                                                            <NA>
## 16                                                                                            <NA>
## 17                                                                                            <NA>
## 18                                                     lung neuroendocrine cells and unknown cells
## 19                                                                                            <NA>
## 20                                                                                            <NA>
## 21                                                             mast cells and unknown immune cells
## 22                                                                                            <NA>
## 23                                                                             multiciliated cells
TSNEPlot(object = tiss, do.return = TRUE, group.by = "free_annotation", do.label = T, no.legend = T)
## Warning: Removed 1 rows containing missing values (geom_text).

TSNEPlot(object = tiss, do.return = TRUE, group.by = "cell_ontology_class", do.label = T, no.legend = T)
## Warning: Removed 1 rows containing missing values (geom_text).

Subcluster

subtiss = SubsetData(tiss, ident.use = c(5,17,22))
subtiss <- subtiss %>% ScaleData() %>% 
  FindVariableGenes(do.plot = TRUE, x.high.cutoff = Inf, y.cutoff = 0.5) %>%
  RunPCA(do.print = FALSE) %>% ProjectPCA(do.print = FALSE)
## [1] "Scaling data matrix"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

PCHeatmap(object = subtiss, pc.use = 1:9, cells.use = 100, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)

PCElbowPlot(subtiss)

sub.n.pcs = 10
sub.res.use = 1
subtiss <- subtiss %>% FindClusters(reduction.type = "pca", dims.use = 1:sub.n.pcs, 
    resolution = sub.res.use, print.output = 0, save.SNN = TRUE) %>%
    RunTSNE(dims.use = 1:sub.n.pcs, seed.use = 10, perplexity=30)

TSNEPlot(object = subtiss, do.label = T, pt.size = 1.2, label.size = 4)

Epithelial clusters (besides Type II and Ciliated, where n is sufficently high) do not align with known marker gene expression because of low numbers. Cannot subcluster further!

Subcluster Myeloid cells

subtiss2 = SubsetData(tiss, ident.use = c(8, 7, 11))
subtiss2 <- subtiss2 %>% ScaleData() %>% 
  FindVariableGenes(do.plot = TRUE, x.high.cutoff = Inf, y.cutoff = 1, x.low.cutoff = 0.5) %>%
  RunPCA(do.print = FALSE) %>% ProjectPCA(do.print = FALSE)
## [1] "Scaling data matrix"
## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |=================================================================| 100%

PCHeatmap(object = subtiss2, pc.use = 1:9, cells.use = 100, do.balanced = TRUE, label.columns = FALSE, num.genes = 8)

PCElbowPlot(subtiss2)

sub2.n.pcs = 13
sub2.res.use = 2
subtiss2 <- subtiss2 %>% FindClusters(reduction.type = "pca", dims.use = 1:sub2.n.pcs, 
    resolution = sub2.res.use, print.output = 0, save.SNN = TRUE) %>%
    RunTSNE(dims.use = 1:sub2.n.pcs, seed.use = 10, perplexity=30)

TSNEPlot(object = subtiss2, do.label = T, pt.size = 1.2, label.size = 4)

TSNEPlot(object = tiss, do.label = TRUE, pt.size = 0.5, group.by='free_annotation')
## Warning: Removed 1 rows containing missing values (geom_text).

Myeloid clusters (besides dendritic and two monocyte clusters, where n is sufficently high) do not align with known marker gene expression because of low numbers. Cannot subcluster further!

Save the Robject for later

When you save the annotated tissue, please give it a name.

filename = here('00_data_ingest', '04_tissue_robj_generated', 
                     paste0("facs_", tissue_of_interest, "_seurat_tiss.Robj"))
print(filename)
## [1] "/home/rstudio/tabula-muris/00_data_ingest/04_tissue_robj_generated/facs_Lung_seurat_tiss.Robj"
save(tiss, file=filename)
# To reload a saved object
# filename = here('00_data_ingest', '04_tissue_robj_generated', 
#                      paste0("facs_", tissue_of_interest, "_seurat_tiss.Robj"))
# load(file=filename)

Export the final metadata

Write the cell ontology and free annotations to CSV.

save_annotation_csv(tiss, tissue_of_interest, "facs")